---
title: 'Section 8: Using Ancillary Assumptions to Identify Punishment Mechanisms from Aggregate Data'
author: "Milan Svolik"
date: "4/20/2023"
output:
  pdf_document: default
  html_document:
    df_print: paged
---

```{r setup, include=FALSE}
rm(list = ls(all = TRUE))
library(tidyverse)
library(stargazer)
library(lmtest)
library(reshape2)
library(foreign)
library(ggplot2)
library(broom)
library(estimatr)

set.seed(5877)
```

# Tables 8.8 and 8.9
* Election results: Estimates for four exemplar neighborhoods
* Election results: Estimates of the three mechanisms for the four exemplar neighborhoods and Istanbul

# OPTIMIZATION FUNCTIONS
```{R} 
# USING PACKAGE ROI FOR OPTIMIZATION
Sys.setenv(ROI_LOAD_PLUGINS = "FALSE")
library(ROI)
library(ROI.plugin.glpk)
library(ROI.plugin.qpoases)
library(ROI.plugin.ecos)
library(ROI.plugin.alabama)
library(ROI.plugin.lpsolve)

# The argument are the margins from a neighborhood
# The margin sums must be identical; otherwise supply these normalizide as proportions
# The output is a vector of the 9 probabilities

lsolve <- function(round1, round2) {
  akp1 = round1[1]
  chp1 = round1[2]
  abs1 = round1[3]
  akp2 = round2[1]
  chp2 = round2[2]
  abs2 = round2[3]
  
  labels <- c("pi11", "pi12", "pi13", "pi21", "pi22", "pi23", "pi31", "pi32", "pi33")
  lm <- rep(0,9)
  lm[1] <- -akp1
  lm[5] <- -chp1
  lm[9] <- -abs1
  
  lo <- L_objective(lm, labels)
  
  # PROBABILITIES MUST SUM TO ONE
  lc1 <- c(rep(1,3), rep(0, 6))
  lc2 <- c(rep(0, 3), rep(1,3), rep(0, 3))
  lc3 <- c(rep(0, 6), rep(1,3))
  
  # MARGINAL SUMS 
  ms1 <- c(rep(0, 9))
  ms1[1] <- akp1
  ms1[4] <- chp1
  ms1[7] <- abs1
  
  ms2 <- c(rep(0, 9))
  ms2[2] <- akp1
  ms2[5] <- chp1
  ms2[8] <- abs1
  
  ms3 <- c(rep(0, 9))
  ms3[3] <- akp1
  ms3[6] <- chp1
  ms3[9] <- abs1
  
  lc_all <- rbind(lc1, lc2, lc3, ms1, ms2, ms3)
  
  lc <- L_constraint(L = lc_all,
                     dir = rep("==", 6), 
                     rhs = c(rep(1, 3), akp2, chp2, abs2),
                     names = labels)
  
  vb <- V_bound(li = 1:9, ui = 1:9, lb = rep(0,9), ub = rep(1, 9))
  
  lp <- OP(objective=lo, constraints=lc,  bounds=vb)
  sol <- ROI_solve(lp, solver="glpk")
  solution(sol)  
}

# BOOTSTRAP THE OPTIMIZATION SOLUTION
# THE FUNCTION RETURNS THE LOWER AND UPPER 95%CI OF THE 
# round1 AND round2 MUST BE ACTUAL RESULTS (I.E. NOT PROPORTIONS ALONE)
# THE NUMBER OF VOTERS MUST BE THE SAME FOR BOTH ELECTIONS
solve_boot <- function(round1, round2, boot_rep) {
  n_reg <- sum(round1)
  
  # BOOTSTRAP THE MARGINS
  save_round_1 <- c()
  save_round_2 <- c()
  for (i in 1:boot_rep) {
    boot_round_1 <- sample(seq(1,3), n_reg, replace=T, prob=round1)
    boot_round_2 <- sample(seq(1,3), n_reg, replace=T, prob=round2)
    save_round_1 <- rbind(save_round_1, table(boot_round_1))
    save_round_2 <- rbind(save_round_2, table(boot_round_2))
  }

  # SOLVE FOR THE BOOTSTRAPED RESULTS
  boot_sol <- c()
  boot_switching <- c()
  boot_backlash <- c()
  boot_disenchantment <- c()
  
  for (i in 1:boot_rep) {
    round_1 <- save_round_1[i,]
    round_2 <- save_round_2[i,]
    sol <- lsolve(round_1, round_2)
    boot_sol <- rbind(boot_sol, sol)
  
    P <- matrix(sol, nrow=3, byrow=TRUE)
    
    round_1_norm <- round_1/sum(round_1)
    
    switching <- round_1_norm[1]*P[1,2] - round_1_norm[2]*P[2,1]
    boot_switching <- rbind(boot_switching, switching)
    
    backlash <- round_1_norm[3]*P[3,2] - round_1_norm[2]*P[2,3]
    boot_backlash <- rbind(boot_backlash, backlash)
    
    disenchantment <- round_1_norm[1]*P[1,3] - round_1_norm[3]*P[3,1]
    boot_disenchantment <- rbind(boot_disenchantment, disenchantment)
  
  }

  # FIND THE PERCENTILE CI
  P_lower <- apply(boot_sol, 2, quantile, probs=c(.025), na.rm=TRUE)
  P_upper <- apply(boot_sol, 2, quantile, probs=c(.975), na.rm=TRUE)

  switching_lower <- apply(boot_switching, 2, quantile, probs=c(.025), na.rm=TRUE)
  switching_upper <- apply(boot_switching, 2, quantile, probs=c(.975), na.rm=TRUE)
  
  backlash_lower <- apply(boot_backlash, 2, quantile, probs=c(.025), na.rm=TRUE)
  backlash_upper <- apply(boot_backlash, 2, quantile, probs=c(.975), na.rm=TRUE)
  
  disenchantment_lower <- apply(boot_disenchantment, 2, quantile, probs=c(.025), na.rm=TRUE)
  disenchantment_upper <- apply(boot_disenchantment, 2, quantile, probs=c(.975), na.rm=TRUE)
  
  return(list(P_ci_lower=P_lower, P_ci_upper=P_upper, switching_ci=c(switching_lower, switching_upper), backlash_ci=c(backlash_lower, backlash_upper), disenchantment_ci=c(disenchantment_lower, disenchantment_upper)))
  
}
```

# SOLVE FOR "EXEMPLAR NEIGHBORHOODS"
* THE MEDIAN NEIGHBORHOOD
```{R}
round_1 <- c(3170, 3285, 1590)
round_2 <- c(2938, 3787, 1320)
sol <- lsolve(round_1, round_2)

# NORMALIZE
round_1_norm <- round_1/sum(round_1)
round_2_norm <- round_2/sum(round_2)

P <- matrix(sol, nrow=3, byrow=TRUE)
PMF <- matrix(NA, nrow=3, ncol=3)
PMF[1,] <- P[1,]*round_1_norm[1]
PMF[2,] <- P[2,]*round_1_norm[2]
PMF[3,] <- P[3,]*round_1_norm[3]
round(PMF, 2)

# SWITCHING FROM 1 TO 2
switching <- round_1_norm[1]*P[1,2] - round_1_norm[2]*P[2,1]
round(100*switching, 2)

# BACKLASH FROM ABSTAIN TO 2
backlash <- round_1_norm[3]*P[3,2] - round_1_norm[2]*P[2,3]
round(100*backlash, 2)

# DISENCHANTMENT FROM 1 TO ABSTAIN
disenchantment <- round_1_norm[1]*P[1,3] - round_1_norm[3]*P[3,1]
round(100*disenchantment, 2)

mechs <- c(switching, backlash, disenchantment)
round(100*mechs, 2)

# BOOTSTRAP THE CIs
boot_rep <- 1000
boot_sol <- solve_boot(round_1, round_2, boot_rep)
round(100*boot_sol$switching_ci, 2)
round(100*boot_sol$backlash_ci, 2)
round(100*boot_sol$disenchantment_ci, 2)
```

* THE SWITCHING EXEMPLAR
```{R}
round_1 <- c(7159,	6290,	3874)
round_2 <- c(6716,	6729,	3878)
sol <- lsolve(round_1, round_2)

# NORMALIZE
round_1_norm <- round_1/sum(round_1)
round_2_norm <- round_2/sum(round_2)

P <- matrix(sol, nrow=3, byrow=TRUE)
PMF <- matrix(NA, nrow=3, ncol=3)
PMF[1,] <- P[1,]*round_1_norm[1]
PMF[2,] <- P[2,]*round_1_norm[2]
PMF[3,] <- P[3,]*round_1_norm[3]
round(PMF, 2)

# SWITCHING FROM 1 TO 2
switching <- round_1_norm[1]*P[1,2] - round_1_norm[2]*P[2,1]
round(100*switching, 2)

# BACKLASH FROM ABSTAIN TO 2
backlash <- round_1_norm[3]*P[3,2] - round_1_norm[2]*P[2,3]
round(100*backlash, 2)

# DISENCHANTMENT FROM 1 TO ABSTAIN
disenchantment <- round_1_norm[1]*P[1,3] - round_1_norm[3]*P[3,1]
round(100*disenchantment, 2)

mechs <- c(switching, backlash, disenchantment)
round(100*mechs, 2)

# BOOTSTRAP THE CIs
boot_rep <- 1000
boot_sol <- solve_boot(round_1, round_2, boot_rep)
round(100*boot_sol$switching_ci, 2)
round(100*boot_sol$backlash_ci, 2)
round(100*boot_sol$disenchantment_ci, 2)
```

* THE BACKLASH EXEMPLAR
```{R}
round_1 <- c(10030,	15396,	6553)
round_2 <- c(10040,	16650,	5289)
sol <- lsolve(round_1, round_2)

# NORMALIZE
round_1_norm <- round_1/sum(round_1)
round_2_norm <- round_2/sum(round_2)

P <- matrix(sol, nrow=3, byrow=TRUE)
PMF <- matrix(NA, nrow=3, ncol=3)
PMF[1,] <- P[1,]*round_1_norm[1]
PMF[2,] <- P[2,]*round_1_norm[2]
PMF[3,] <- P[3,]*round_1_norm[3]
round(PMF, 2)

# SWITCHING FROM 1 TO 2
switching <- round_1_norm[1]*P[1,2] - round_1_norm[2]*P[2,1]
round(100*switching, 2)

# BACKLASH FROM ABSTAIN TO 2
backlash <- round_1_norm[3]*P[3,2] - round_1_norm[2]*P[2,3]
round(100*backlash, 2)

# DISENCHANTMENT FROM 1 TO ABSTAIN
disenchantment <- round_1_norm[1]*P[1,3] - round_1_norm[3]*P[3,1]
round(100*disenchantment, 2)

mechs <- c(switching, backlash, disenchantment)
round(100*mechs, 2)

# BOOTSTRAP THE CIs
boot_rep <- 1000
boot_sol <- solve_boot(round_1, round_2, boot_rep)
round(100*boot_sol$switching_ci, 2)
round(100*boot_sol$backlash_ci, 2)
round(100*boot_sol$disenchantment_ci, 2)
```

* THE DISENCHANTMENT EXEMPLAR
```{R}
round_1 <- c(698,	213,	103)
round_2 <- c(638,	209,	167)
sol <- lsolve(round_1, round_2)

# NORMALIZE
round_1_norm <- round_1/sum(round_1)
round_2_norm <- round_2/sum(round_2)

P <- matrix(sol, nrow=3, byrow=TRUE)
PMF <- matrix(NA, nrow=3, ncol=3)
PMF[1,] <- P[1,]*round_1_norm[1]
PMF[2,] <- P[2,]*round_1_norm[2]
PMF[3,] <- P[3,]*round_1_norm[3]
round(PMF, 2)

# SWITCHING FROM 1 TO 2
switching <- round_1_norm[1]*P[1,2] - round_1_norm[2]*P[2,1]
round(100*switching, 2)

# BACKLASH FROM ABSTAIN TO 2
backlash <- round_1_norm[3]*P[3,2] - round_1_norm[2]*P[2,3]
round(100*backlash, 2)

# DISENCHANTMENT FROM 1 TO ABSTAIN
disenchantment <- round_1_norm[1]*P[1,3] - round_1_norm[3]*P[3,1]
round(100*disenchantment, 2)

mechs <- c(switching, backlash, disenchantment)
round(100*mechs, 2)

# BOOTSTRAP THE CIs
boot_rep <- 1000
boot_sol <- solve_boot(round_1, round_2, boot_rep)
round(100*boot_sol$switching_ci, 2)
round(100*boot_sol$backlash_ci, 2)
round(100*boot_sol$disenchantment_ci, 2)
```
### SOLVE FOR ALL NEIGHBORHOODS
```{R eval=FALSE}
load("data/neighborhoods_transitions.RData")

march_votes <- matrix(as.integer(as.matrix(results_neighbourhood_save[, c(3,4,5)])), nrow=nrow(results_neighbourhood_save))
june_votes <- matrix(as.integer(as.matrix(results_neighbourhood_save[, c(6,7,8)])), nrow=nrow(results_neighbourhood_save))

solutions <- c()
for (i in 1:nrow(results_neighbourhood_save)) {
  round1 <- march_votes[i,]
  round2 <- june_votes[i,] 
  solutions <- rbind(solutions, lsolve(round1, round2))
  }

solutions_neighborhoods <- cbind(ungroup(results_neighbourhood_save), solutions)
save(solutions_neighborhoods, file ="data/solutions_neighborhoods.RData")
```
### EXAMINE THE NEIGHBORHOOD DATA SOLUTIONS
```{R}
load("data/solutions_neighborhoods.RData")
results <- solutions_neighborhoods[,1:8]
akp_reg_march <- results$AKP_march/(results$AKP_march + results$CHP_march + results$ABS_march)
chp_reg_march <- results$CHP_march/(results$AKP_march + results$CHP_march + results$ABS_march)
abs_reg_march <- results$ABS_march/(results$AKP_march + results$CHP_march + results$ABS_march)
akp_reg_june <- results$AKP_june/(results$AKP_june + results$CHP_june + results$ABS_june)
chp_reg_june <- results$CHP_june/(results$AKP_june + results$CHP_june + results$ABS_june)
abs_reg_june <- results$ABS_june/(results$AKP_june + results$CHP_june + results$ABS_june)

akp_reg_diff <- akp_reg_june - akp_reg_march
chp_reg_diff <- chp_reg_june - chp_reg_march
abs_reg_diff <- abs_reg_june - abs_reg_march

akp_two_march <- results$AKP_march/(results$AKP_march + results$CHP_march)

round_1_norm <- cbind(akp_reg_march, chp_reg_march, abs_reg_march)
round_2_norm <- cbind(akp_reg_june, chp_reg_june, abs_reg_june)

# TRANSITION MATRIX AS ROW VECTORS
P1X <- cbind(pis[,1], pis[,2], pis[,3])
P1X_min <- apply(P1X, 2, quantile, na.rm=TRUE, prob=.025)
P1X_max <- apply(P1X, 2, quantile, na.rm=TRUE, prob=.975)

P2X <- cbind(pis[,4], pis[,5], pis[,6])
P2X_min <- apply(P2X, 2, quantile, na.rm=TRUE, prob=.025)
P2X_max <- apply(P2X, 2, quantile, na.rm=TRUE, prob=.975)

P3X <- cbind(pis[,7], pis[,8], pis[,9])
P3X_min <- apply(P3X, 2, quantile, na.rm=TRUE, prob=.025)
P3X_max <- apply(P3X, 2, quantile, na.rm=TRUE, prob=.975)

P_min <- rbind(P1X_min, P2X_min, P3X_min)
P_max <- rbind(P1X_max, P2X_max, P3X_max)


# SWITCHING FROM 1 TO 2
switching <- round_1_norm[,1]*P1X[,2] - round_1_norm[,2]*P2X[,1]
summary(switching)

# BACKLASH FROM ABSTAIN TO 2
backlash <- round_1_norm[,3]*P3X[,2] - round_1_norm[,2]*P2X[,3]
summary(backlash)

# DISENCHANTMENT FROM 1 TO ABSTAIN
disenchantment <- round_1_norm[,1]*P1X[,3] - round_1_norm[,3]*P3X[,1]
summary(disenchantment)

# WEIGHT THE ESTIMATES BY THE NUMBER OF REGISTERED VOTERS
registered <- results$AKP_march + results$CHP_march + results$ABS_march
overall <- apply(registered*cbind(switching, backlash, disenchantment), 2, sum)
mechs <- overall/sum(registered)
mechs
```

### BOOTSTRAP ALL NEIGHBORHOODS TO GET THE CONFIDENCE INTERVALS
* EACH ENTRY IN boot_solutions IS A COMPLETE SET OF SOLUTIONS PER BOOT DRAW
```{R eval=FALSE}
boot_rep <- 10000
boot_solutions <- vector("list", boot_rep)

load("neighborhoods_transitions.RData")

march_votes <- matrix(as.integer(as.matrix(results_neighbourhood_save[, c(3,4,5)])), nrow=nrow(results_neighbourhood_save))
june_votes <- matrix(as.integer(as.matrix(results_neighbourhood_save[, c(6,7,8)])), nrow=nrow(results_neighbourhood_save))

# START THE CLOCK
start_time <- proc.time()

for (b in 1:boot_rep) {
  solutions <- c()
    for (i in 1:nrow(results_neighbourhood_save)) {
      round1 <- march_votes[i,]
      round2 <- june_votes[i,]
      
      n_reg <- sum(round1)
      
      boot_round_1 <- sample(seq(1,3), n_reg, replace=T, prob=round1)
      boot_round_2 <- sample(seq(1,3), n_reg, replace=T, prob=round2)

      solutions <- rbind(solutions, lsolve(table(boot_round_1), table(boot_round_2)))
    }
  
  boot_solutions[[b]] <- solutions
  print(b)
}

end_time <- proc.time()
sec_time <- end_time-start_time
sec_time/60

save(boot_solutions, file ="data/boot_neighborhoods.RData")
```

### PROCESS THE ALL-NEIGBORHOOD BOOTSTRAP TO GET CONFIDENCE INTERVALS FOR THE AGGREGATE ESTIMATES
* EACH ENTRY IN boot_solutions IS A COMPLETE SET OF SOLUTIONS PER BOOT DRAW
```{R}
load("data/boot_neighborhoods.RData")
load("data/solutions_neighborhoods.RData")
boot_rep <- 10000

# LOAD NEIGHBORHOOD LEVEL RESULTS
results <- solutions_neighborhoods[,1:8]
akp_reg_march <- results$AKP_march/(results$AKP_march + results$CHP_march + results$ABS_march)
chp_reg_march <- results$CHP_march/(results$AKP_march + results$CHP_march + results$ABS_march)
abs_reg_march <- results$ABS_march/(results$AKP_march + results$CHP_march + results$ABS_march)

round_1_norm <- cbind(akp_reg_march, chp_reg_march, abs_reg_march)

switching_boot <- c()
backlash_boot <- c()
disenchantment_boot <- c()

for (b in 1:boot_rep) {
  
  pis <- boot_solutions[[b]]
  
  # TRANSITION MATRIX AS ROW VECTORS
  P1X <- cbind(pis[,1], pis[,2], pis[,3])
  P2X <- cbind(pis[,4], pis[,5], pis[,6])
  P3X <- cbind(pis[,7], pis[,8], pis[,9])

  # SWITCHING FROM 1 TO 2
  switching <- round_1_norm[,1]*P1X[,2] - round_1_norm[,2]*P2X[,1]
  # BACKLASH FROM ABSTAIN TO 2
  backlash <- round_1_norm[,3]*P3X[,2] - round_1_norm[,2]*P2X[,3]
  # DISENCHANTMENT FROM 1 TO ABSTAIN
  disenchantment <- round_1_norm[,1]*P1X[,3] - round_1_norm[,3]*P3X[,1]

  # AGGREGATE AT THE CITY LEVEL
  registered <- results$AKP_march + results$CHP_march + results$ABS_march
  overall <- apply(registered*cbind(switching, backlash, disenchantment), 2, sum, na.rm=TRUE)
  mechs <- overall/sum(registered)
  
  switching_boot <- rbind(switching_boot, mechs[1])
  backlash_boot <- rbind(backlash_boot, mechs[2])
  disenchantment_boot <- rbind(disenchantment_boot, mechs[3])
}

round(100*quantile(switching_boot, probs=c(.025, .975), na.rm=TRUE),2)
round(100*quantile(backlash_boot, probs=c(.025, .975), na.rm=TRUE),2)
round(100*quantile(disenchantment_boot, probs=c(.025, .975), na.rm=TRUE),2)
```

# Figure 8.29
* Election results: Vote switching, backlash, and disengagement by neighborhood-level partisanship
```{R}
rm(list=ls(all=TRUE))
load("data/solutions_neighborhoods.RData")

# NEIGHBORHOOD LEVEL RESULTS
results <- solutions_neighborhoods[,1:8]
akp_reg_march <- results$AKP_march/(results$AKP_march + results$CHP_march + results$ABS_march)
chp_reg_march <- results$CHP_march/(results$AKP_march + results$CHP_march + results$ABS_march)
abs_reg_march <- results$ABS_march/(results$AKP_march + results$CHP_march + results$ABS_march)
akp_reg_june <- results$AKP_june/(results$AKP_june + results$CHP_june + results$ABS_june)
chp_reg_june <- results$CHP_june/(results$AKP_june + results$CHP_june + results$ABS_june)
abs_reg_june <- results$ABS_june/(results$AKP_june + results$CHP_june + results$ABS_june)

akp_reg_diff <- akp_reg_june - akp_reg_march
chp_reg_diff <- chp_reg_june - chp_reg_march
abs_reg_diff <- abs_reg_june - abs_reg_march

akp_two_march <- results$AKP_march/(results$AKP_march + results$CHP_march)

round_1_norm <- cbind(akp_reg_march, chp_reg_march, abs_reg_march)
round_2_norm <- cbind(akp_reg_june, chp_reg_june, abs_reg_june)

pis <- solutions_neighborhoods[,9:17]
P1X <- cbind(pis[,1], pis[,2], pis[,3])
P2X <- cbind(pis[,4], pis[,5], pis[,6])
P3X <- cbind(pis[,7], pis[,8], pis[,9])

switching <- round_1_norm[,1]*P1X[,2] - round_1_norm[,2]*P2X[,1]
backlash <- round_1_norm[,3]*P3X[,2] - round_1_norm[,2]*P2X[,3]
disenchantment <- round_1_norm[,1]*P1X[,3] - round_1_norm[,3]*P3X[,1]

df_plot <- data.frame(rbind(cbind(akp_two_march, switching, 1), cbind(akp_two_march, backlash, 2), 
                            cbind(akp_two_march, disenchantment, 3)))
colnames(df_plot) <- c("akp_two_march", "estimate", "mech")
dim(df_plot)

data_plot <- df_plot %>%
  group_by(pct=cut(akp_two_march, c(seq(0, .7, .1), 1)), mech) %>%
  summarize(
        mean = mean(estimate, na.rm=T),
        se   =  sd(estimate, na.rm=T) / sqrt(sum(!is.na(estimate))),
        n = n()
  ) %>%
    mutate(ci_lower = ifelse(mean-1.96*se>-1, mean-1.96*se, -1)) %>%
    mutate(ci_upper = ifelse(mean+1.96*se<1, mean+1.96*se, 1)) %>%
    mutate(mech=as.factor(mech))
  
group_labels <- c("vote switching", "backlash", "disenchantment")
ggplot(data_plot, aes(x=pct, y=mean), col=mech) +
  geom_point(aes(col=mech, shape=mech)) +
  geom_line(aes(col=mech, group=mech, linetype = mech),  size=1) + 
  scale_shape_manual(name  ="", values=c(1,0,2), labels=group_labels) +
  scale_linetype_manual(name  ="", values=c("dashed", "solid", "dotted"), labels=group_labels)+
  scale_color_manual(name  ="", values=c("black", "red", "blue"), labels=group_labels) +
  geom_errorbar(aes(ymin=ci_lower, ymax=ci_upper, col=mech), width=0.1) +
  theme_minimal() +
  scale_x_discrete(name = "% AKP two-party vote share in March 2019",
        labels=seq(15, 75, 10)) +
  scale_y_continuous(name = "% of registered voters",
        labels=seq(0, 10, 2),
        breaks=seq(0, .1, .02))

ggsave("figures, appendix/mechanisms by AKP two party vote share.pdf", width = 9, height = 6, units = "in")
```

